#############################################
#############################################
### REPLICATION MATERIAL                  ###  
### Gender Quotas and Political Knowledge ###
### Quotas Knowledge Time Analysis (main) ###
#############################################
#############################################

# Libraries needed
library(tidyverse)
library(stargazer)
library(haven)
library(lme4)
library(rstanarm)
library(tidybayes)
library(BayesPostEst)
library(ggplot2)
library(ggridges)
library(magrittr)
library(dplyr)
library(purrr)
library(forcats)
library(tidyr)
library(modelr)
library(ggdist)
library(tidybayes)
library(cowplot)
library(rstan)
library(RColorBrewer)
library(gganimate)
library(wrangle)

# Set plot theme
theme_set(theme_tidybayes() + panel_border())

# Set seed and stan options (to make it run faster)
set.seed(123)
options(mc.cores = parallel::detectCores())

# Read RDS data
analysis.2 <- readRDS("data/analysis/analysis_2_df.rds")

# Modify full dataset
analysis.int <- analysis.2 %>%
  mutate(female = recode(gender,
                         "1" = 0,
                         "0" = 1)) %>%
  select(ID, country, name, year, know_q_num, know_correct, 
         female, quota, age, education, GDP_growth, counter) %>%
  mutate(age = cut(age, 5,labels = FALSE)) %>%
  mutate(education = cut(education, 5, labels = FALSE)) %>%
  mutate(treated = ifelse(name == "Belgium" | name == "France" | name == "Greece" |
                            name == "Ireland" | name == "Portugal" |
                            name == "Spain" , 1, 0)) %>%
  write_rds("data/analysis/analysis_int.rds") %>%
  glimpse()

# Binomial version of the dataset - full sample
analysis.int.binomial <- analysis.int %>%
  group_by(female, quota, age, treated, counter, education, 
           GDP_growth, country, year, know_q_num) %>%
  summarize(n_correct = sum(know_correct),
            n_incorrect = n() - sum(know_correct)) %>%
  write_rds("data/analysis/analysis_int_binomial.rds") %>%
  glimpse()

###################################
###################################
## MODEL AND RESULTS TABLE (A3) ###
###################################
###################################


#Model
fit.time <- stan_glmer(cbind(n_correct, n_incorrect) ~ 
                         female*quota*age +
                         female*age*counter +
                         education + 
                         GDP_growth + 
                         (1 | country) +
                         (1 | year) +
                         (1 | know_q_num) + 
                         (1 + female | country:know_q_num), 
                       data = analysis.int.binomial,
                       family=binomial(link="logit"),
                       iter = 6000,
                       control = list(adapt_delta = 0.99))

#######################
### Summary - TABLE A3
#######################
summary(fit.time)

print(fit.time, digits = 5)
mcmcReg(fit.time, format = "latex")
mcmcTab(fit.time)

#######################
### Convergence checks
#######################

# Rhat - convergence ok
dev.copy(png,'rhat_fit_time.png')
plot(fit.time, "rhat")
dev.off()

# Prior summary
prior_summary(fit.time)

#################################
### PREDICTED PROBS DATASETS  ###
#################################

## Dataset with predicted probabilities (no random effects)
pred_fit.time_noRE <- crossing(country = unique(analysis.int.binomial$country), 
                               quota = c(0,1), 
                               female = c(0,1),
                               counter = c(0:20),
                               know_q_num = c(1:10),
                               age = c(1:5),
                               education = mean(analysis.int.binomial$education, 
                                                na.rm = T),
                               GDP_growth = mean(analysis.int.binomial$GDP_growth, 
                                                 na.rm = T)) %>%
  add_linpred_draws(fit.time, 
                    scale = "response", 
                    re_formula = NA) %>%
  write_rds("data/analysis/pred_fit_time_noRE.rds")

## Dataframe with draws
draws <- fit.time %>%
  spread_draws(b[term,group]) %>%
  filter(str_detect(group, "country:know_q_num:")) %>%
  separate(group, c("country_2", "q_num_2", "country", "q_num"), ":") %>%
  write_rds("/data/analysis/draws_fit_time.rds")

# Dataframe with predicted probabilities (with random effects)
pred_fit.time_c_RE <- crossing(country = unique(analysis.int.binomial$country), 
                               quota = c(0,1), 
                               female = c(0,1),
                               know_q_num = c(1:10),
                               age = c(1:5),
                               education = mean(analysis.int.binomial$education, 
                                                na.rm = T),
                               GDP_growth = mean(analysis.int.binomial$GDP_growth, 
                                                 na.rm = T)) %>%
  add_linpred_draws(fit.time, 
                    scale = "response", 
                    re_formula = ~ (1 + female | country:know_q_num)) %>%
  write_rds("/data/analysis/pred_fit_time_c_RE.rds")

#############
#############
### PLOTS ###
#############
#############


####################################################################
### Figure 4: Predicted Probabilites - Time Trends (for 15-31)   ###
#################################################################### 

plot_time_1 <- readRDS("data/analysis/pred_fit_time_noRE.rds")
# Smaller dataset (only keep data for up to t+5 post intervention)

plot_time_small <- filter(plot_time_1, counter < 7)

plot_time_small$female <- factor(plot_time_small$female,
                                 labels = c("Male", "Female"))
plot_time_small$quota <- factor(plot_time_small$quota, 
                                labels = c("No Quotas", "Quotas"))
plot_time_small$age <- factor(plot_time_small$age, 
                              labels = c("15-31", "32-48", "49-65", "66-82", "83-99"))
plot_time_small$counter <- factor(plot_time_small$counter, 
                                  labels = c("t", "t+1", "t+2", "t+3", "t+4", "t+5", "t+6"))

# Smaller dataset (only keep data for the young people)
plot_time_small_young <- filter(plot_time_small, age == "15-31")

plot_time_small_young %>%
  ggplot(aes(x = counter, y = .epred, fill = female, color = female)) +
  stat_halfeye() +
  facet_grid(quota ~ .) +
  coord_flip() +
  scale_color_grey(name = "Gender",
                   labels = c("Male", "Female")) +
  scale_fill_grey(name = "Gender",
                  labels = c("Male", "Female")) +
  labs(x = "Years Post Intervention", 
       y = "Predicted Probabilities - Age Group 15-31")
ggsave("plots/main/fig-4-grayscale.png")


#################################################
### Figure 5: Percent Change - all age groups ###
################################################# 

#Full results dataset
plot_time_2 <- readRDS("data/analysis/pred_fit_time_c_RE.rds")

# Small dataset (only until t+6)
plot_time_2_small <- filter(plot_time_2, counter < 7) 
write_rds(plot_time_2_small, "data/analysis/pred_fit_time_c_RE_SMALL.rds")

percent_change <- plot_time_2_small %>%
  ungroup() %>% 
  mutate(quota = case_when(quota == 1 ~ "quota",
                           quota == 0 ~ "noquota"),
         female = case_when(female == 0 ~ "man",
                            female == 1 ~ "woman")) %>%
  select(-.row) %>% 
  pivot_wider(names_from = c(quota, female), values_from = .epred) %>%
  mutate(second_difference = (quota_woman - quota_man) - (noquota_woman - noquota_man)) %>%
  mutate(difference_noquota = (noquota_woman - noquota_man)) %>%
  mutate(percent_change = second_difference/difference_noquota) %>%
  mutate(quotas = ifelse(country == 1 | country == 2 | country == 8 | country == 10 | country == 11 | country == 12, 1, 0)) %>%
  glimpse()

percent_change$age <- factor(percent_change$age, 
                             labels = c("15-31", "32-48", "49-65", "66-82", "83-99"))
percent_change$counter <- factor(percent_change$counter, 
                                 labels = c("t", "t+1", "t+2", "t+3", "t+4", "t+5", "t+6"))

percent_change %>% 
  ggplot(aes(x = counter, 
             y = percent_change,
             color = age,
             fill = age)) + 
  coord_flip() +
  stat_pointinterval() +
  facet_grid(factor(age, levels=rev(unique(age))) ~ .) +
  scale_fill_grey(name = "Age Groups",
                  labels = c("15-31", "32-48", "49-65", "66-82", "83-99")) +
  scale_color_grey(name = "Age Groups") +
  geom_hline(yintercept = 0) +
  labs(x = "Years from Intervention",
       y = "Percent Change")

ggsave("plots/main/fig-5-time.png")

########################################################
### Figure A1: Predicted Probabilites - Time Trends  ###
########################################################


plot_time_small %>%
  ggplot(aes(x = counter, y = .epred, fill = female, shape = female)) +
  stat_halfeye() +
  facet_grid(quota ~ .) +
  coord_flip() +
  scale_shape_manual(name = "Gender",
                     labels = c("Male", "Female"),
                     values = c(15, 17)) +
  scale_fill_brewer(name = "Gender",
                    labels = c("Male", "Female")) +
  labs(x = "Years Post Intervention", y = "Predicted Probabilities")
ggsave("plots/main/fig-2-time.png")

###########################################################################
### Figure A2: Predicted Probabilites - Time Trends (age, quotas, time) ###
###########################################################################

plot_time_small %>%
  ggplot(aes(x = counter, y = .epred, color = female, shape = female)) +
  stat_pointinterval() +
  facet_grid(quota ~ age) +
  scale_shape_manual(name = "Gender",
                     labels = c("Male", "Female"),
                     values = c(15, 17)) +
  scale_color_brewer(name = "Gender",
                     labels = c("Male", "Female")) +
  labs(x = "Years Post Intervention", y = "Predicted Probabilities")                          
ggsave("plots/main/fig-3-time.png")


